In [3]:
from numpy.linalg import inv
class Kalman():
'''Implements a 1D (observation dimension) Bayesian Kalman filter following the probabilistic approach of Murphy page ~641.
dim_z is the number of measurement inputs
Attributes
----------
mu : numpy.array(dim_mu, 1)
State estimate vector
sigma : numpy.array(dim_x, dim_x)
Covariance matrix
A : numpy.array(dim_mu, dim_mu)
State Transition matrix
B : numpy.array(dim_mu, dim_u)
Control transition matrix
C : numpy.array(dim_mu, dim_mu)
Measurement function
D : numpy.array(dim_mu, dim_u)
Control observation matrix
R : numpy.array(dim_z, dim_z)
Measurement noise matrix
Q : numpy.array(dim_x, dim_x)
Process noise matrix
S : numpy.array(dim_z, dim_z)
Observation Noise Estimate. For now set to R
'''
def __init__(self, mu_0, sigma_0, A, B, C, D, Q, R):
'''
Parameters
----------
mu_0 : numpy.array(dim_mu, 1)
Initial state estimate vector
sigma_0 : numpy.array(dim_x, dim_x)
Initial covariance matrix
A : numpy.array(dim_mu, dim_mu)
State Transition matrix
B : numpy.array(dim_mu, dim_u)
Control transition matrix
C : numpy.array(dim_mu, dim_mu)
Measurement function
D : numpy.array(dim_mu, dim_u)
Control observation matrix
R : numpy.array(dim_z, dim_z)
Measurement noise matrix
Q : numpy.array(dim_x, dim_x)
Process noise matrix
'''
self.A = A # Parameter matrix A
self.B = B # Parameter matrix B
self.C = C # Parameter matrix C
self.D = D # Parameter matrix D
self.Q = Q # State noise covaraiance matrix
self.R = R # Observation noise covariance matrix
self.S = self.R # Observation Noise Estimate. For now set to R
self.mu = mu_0 # Initial state estimate
self.sigma = sigma_0 # Initial state covariance
def predict(self, u=None):
''' Murphy Sec. 18.3.1.1'''
# Predicted state covariance
self.sigma = np.dot(np.dot(self.A, self.sigma), self.A.T) + self.Q
# if there is no control input do not include it
if u is None:
self.mu = np.dot(self.A, self.mu) # Predict state mean
else:
self.mu = np.dot(self.A, self.mu) + np.dot(self.B, self.u)
def update(self, Y):
'''
Add a new measurement (z) to the Kalman filter. If z is None, nothing
is changed. Murphy Sec. 18.3.1.2
Parameters
----------
Y : np.array(dim_z)
measurement for this update. z can be a scalar if dim_z is 1,
otherwise it must be a column vector.
'''
self.y = np.dot(self.C,self.mu) # Posterior predictive mean
r = Y - self.y # residual
S = np.dot(np.dot(self.C, self.sigma), self.C.T) + self.R #
K = np.dot(np.dot(self.sigma, self.C.T), inv(S)) # Kalman Gain
# Correct the state covariance and mean
self.mu = self.mu + np.dot(K, r)
I_KC = np.identity(len(self.mu)) - np.dot(K,self.C)
self.sigma = np.dot(np.dot(I_KC, self.sigma), I_KC.T) + np.dot(np.dot(K,self.R), K.T)
# Update the class attribute values
self.K = K
self.S = S
In [4]:
def EstimateObservationNoise(sensor_number, start_obs, end_obs, plot=False):
'''
Estimate the Gaussian noise over an observation period. Just subtract mean and estimate variance. NaN's are omitted.
Parameters
----------
sensor_number : integer
integer index of sensor in pandas dataframe.
start_obs : integer
integer index of starting observation
end_obs : integer
integer index of ending observation
plot : bool (default true)
If true, plot the histogram and gaussian estimator.
Returns
----------
variance : Variance of observation set.
'''
df = pd.read_pickle('../output/cluster_0_cleaned.pickle')
# Observations
Y = df['snowdepth_%i'%sensor_number].values
obs = Y[start_obs:end_obs]
obs = obs[~np.isnan(obs)]
obs -= np.mean(obs)
var = np.std(obs)**2
bins = np.linspace(-100,100,50)
if plot:
plt.hist(obs, bins=bins, histtype='step', normed=True, label='Sensor %i'%sensor_number)
gaus = np.exp(-bins**2/(2*var))
gaus = gaus / np.sum(gaus*(bins[-1]-bins[0]))
#plt.plot(bins, gaus)
plt.legend(frameon=False)
plt.xlabel('Scatter [mm]')
return var
# What range of observations are we using to estimate the noise?
start_obs=2000
stop_obs=3000
for i_sensor in range(1,11):
plt.subplot(211)
if i_sensor > 5:
plt.subplot(212)
EstimateObservationNoise(i_sensor, start_obs, stop_obs, plot=True)
plt.text(.05, .82, 'Obs:%i-%i'%(start_obs, stop_obs), transform=plt.gca().transAxes)
plt.show()
# Uncomment to plot sensor 9 over the time period of interest.
# df = pd.read_pickle('../output/cluster_0_cleaned.pickle')
# Y = df['snowdepth_%i'%9].values
# plt.plot(range(start_obs, stop_obs),Y[start_obs:stop_obs])
In [ ]:
In [38]:
import pandas as pd
import time
def FilterSnowdepth(sensor_number, obs_noise, system_noise, outlier_threhold=2e3):
'''
Run a forward Kalman filter through a single snowdepth sensor time series.
Parameters
----------
sensor_number : integer
integer index of sensor in pandas dataframe.
obs_noise : np.array(z_dim, z_dim)
Specify the observation noise covariance matrix. Scalar if z_dim=1 (observation dimension)
system_noise : np.array(mu_dim, mu_dim)
Specifies the system (process) covariance matrix mu_dim is the state dimension
outlier_threshold : float
Distance threshold in mm before point is considered an outlier, and thus not used in
the measurement update. This needs to be large enough that real baseline shifts are not omitted.
Returns
----------
mu_list : np.array(N_obs, dim_mu)
contains the updated state vector for each observation
sigma_list : contains the projected measurement covariance at each observation.
'''
df = pd.read_pickle('../output/cluster_0_cleaned.pickle')
# Observations
Y = df['snowdepth_%i'%sensor_number].values
# Let's estimate the initial baseline using the median of the first 2500 data points, excluding NaNs
baseline = np.median(Y[2000:3000][~np.isnan(Y[2000:3000])])
# Label the state parameters.
state_params=['depth_1', 'velocity_1', 'baseline_1']
# First observation that is not nan
Y0 = Y[np.argwhere(~np.isnan(Y))[0]]
sigma_0 = np.diag((50, 10, 10))
mu_0 = np.array([-Y0+baseline, 0., baseline]) # Initial state is the first observation
dt = .25 # 15 minute intervals. Velocity in mm/hr
# Transition Matrix
A = np.array([[1, dt, 0],
[0, 1, 0],
[0, 0, 1]])
# Control Model
B = np.zeros((len(mu_0),len(mu_0)))
# Observation Matrix
C = np.array([[-1, 0, +1],])
# Process noise.
Q = system_noise
# Observation Noise
R = obs_noise
# For now, no control input
u = None
D = None
K = Kalman(mu_0, sigma_0, A, B, C, D, Q, R)
sigma_list = np.zeros(len(Y))
mu_list = np.zeros((len(Y),len(mu_0)))
#diffs = np.zeros(len(Y)) # Debugging
# Initial values
mu_list[0,:] = mu_0
sigma_list[0] = obs_noise
print 'Processing sensor %i'%sensor_number
start = time.time()
for i_obs in range(1, len(Y)):
K.predict()
# Only update the state if we have a valid measurement
# and it is not an obvious outlier (threhold is a change of >2meters)
difference = np.abs((Y[i_obs]-np.dot(K.C,K.mu)))
# diffs[i_obs] = difference # Debugging
if not np.isnan(Y[i_obs]) and (difference < 2e3):
K.update(Y[i_obs])
mu_list[i_obs] = K.mu
sigma_list[i_obs] = K.S
if (i_obs%500)==0:
print '\r Forward pass on observation %i of %i'%(i_obs,len(Y)),
print '\n Completed Forward Pass in %1.2f s'%(time.time()-start)
return mu_list, sigma_list
In [47]:
results = []
for i_sensor in range(1,11):
# Estimate the observation variance initially.
start_obs=2000
stop_obs=3000
obs_noise = EstimateObservationNoise(i_sensor, start_obs, stop_obs)
# Run the Kalman Filter
results.append(FilterSnowdepth(i_sensor, obs_noise=obs_noise, system_noise=np.diag((1e0,1e-2,1e-3))))
In [48]:
plt.figure(figsize=(6,8))
for i_sensor, (mu, sigma,) in enumerate(results[:11]):
# This sensor went out after not very long
# if i_sensor==1:
# continue
# Load the observations for plotting
df = pd.read_pickle('../output/cluster_0_cleaned.pickle')
Y = df['snowdepth_%i'%(i_sensor+1)].values
plt.subplot(311)
# Plot the snowdepth
plt.plot(mu[:,0], label='Sensor %i'%(i_sensor+1))
#plt.plot(mu[:,0]+sigma, label='Sensor %i'%(i_sensor+1), color='firebrick')
#plt.plot(mu[:,0]-sigma, label='Sensor %i'%(i_sensor+1), color='firebrick')
plt.xlabel('Observation')
plt.ylabel('Snowdepth $d$ [mm]')
plt.grid(linestyle='-', alpha=.15)
#plt.legend(loc=2, ncol=2, frameon=False, columnspacing=.2, labelspacing=.2)
plt.ylim(-100, 3000)
plt.ylim(600, 750)
plt.xlim(14400, 14800)
df = pd.read_pickle('../output/cluster_0_cleaned.pickle')
# Observations
Y = df['snowdepth_4'].values
# Let's estimate the initial baseline using the median of the first 2500 data points, excluding NaNs
baseline = np.median(Y[2000:3000][~np.isnan(Y[2000:3000])])
plt.scatter(range(len(Y)), -Y+baseline, s=.5, color='k', marker='o')
# Can also plot uncertainty bands, though these are not great yet.
plt.fill_between(range(len(mu[:,0])), mu[:,0]-np.sqrt(sigma), mu[:,0]+np.sqrt(sigma), alpha=.1, color='m')
# -------------------------------
# Plot the velocity parameter
plt.subplot(312)
plt.plot(mu[:,1], alpha=.3)
plt.xlabel('Observation')
plt.ylabel('$\dot{d}$ [mm/hr]')
plt.ylim(-10,25)
plt.grid(linestyle='-', alpha=.15)
# -------------------------------
# Plot the baseline
plt.subplot(313)
plt.plot(mu[:,2], alpha=.7)
plt.xlabel('Observation')
plt.ylabel('$b(t)$ [mm]')
plt.ylim(3.5e3,4.5e3)
plt.grid(linestyle='-', alpha=.15)
In [259]:
def GenerateStepControl(Y, window_size):
'''
Returns the convoltion of a step function with the input observation array Y.
'''
step = np.zeros(2*window_size)
step[:window_size] = -1
step[window_size:] = 1
return np.convolve(step, Y, mode='full')/(2*window_size)
window_size = 50
plt.figure(figsize=(10,8))
steps = []
for i_sensor in range(1,11):
# Note this is the cleaned and filled dataset.
df = pd.read_pickle('../output/cluster_0_cleaned_filled.pickle')
Y = df['snowdepth_%i'%(i_sensor)].values
plt.subplot(311)
plt.plot(Y)
#plt.ylim(2500,3500)
conv = GenerateStepControl(Y, window_size=window_size)
steps.append(conv)
plt.subplot(312)
plt.plot(conv, label='Sensor %i'%i_sensor)
plt.ylabel('Step Response')
plt.ylim(-200,200)
plt.subplot(313)
# Calculate the median and median average deviation
steps_med = np.median(steps, axis=0)
steps_MAD = np.median(np.abs((steps-steps_med)), axis=0)
# steps_med = np.mean(steps, axis=0)
# steps_MAD = np.std(steps, axis=0)
for i_sensor in range(1,11):
z_score = (steps[i_sensor-1]-steps_med) / steps_MAD
# Need to shift by window size to figure out where the sensor was moved.
plt.plot(np.arange(len(z_score))+window_size, z_score, alpha=1, label='Sensor %i'%i_sensor)
plt.ylabel('Modified Z-Score')
plt.legend(loc=9, ncol=3, columnspacing=.2, labelspacing=.2, frameon=False)
plt.xlim(0,18000)
#plt.ylim(0,50)
Out[259]:
In [ ]:
bins=np.linspace(0,3,31) # Velocity bins
v_threhold = 3. # Only include velocity ratios when both velocities are above this
bin_centers = 0.5*(bins[:-1] + bins[1:])
histograms = np.zeros((len(results), len(results), len(bins)-1))
median_ratios = np.zeros((len(results), len(results)))
for i_sensor, (mu_i, sigma_i,) in enumerate(results):
# Get the velocities of the first set.
v_i = mu_i[:,1]
for j_sensor, (mu_j, sigma_j,) in enumerate(results):
if i_sensor == j_sensor:
continue
# Get the velocities of the second set.
v_j = mu_j[:,1]
valid = np.where((v_i>v_threhold) & (v_j>v_threhold))[0] # Only positive since we are looking at snowfall not melt rate
ratios = (v_i[valid]/v_j[valid])
median_ratios[i_sensor, j_sensor] = np.median(ratios[~np.isnan(ratios)])
hist, bin_edges = np.histogram(v_i[valid]/v_j[valid], bins=bins, normed=True)
histograms[i_sensor, j_sensor] = hist
# Find velocities over some threhold
median_ratios[i_sensor, np.isnan(median_ratios[i_sensor])] = 1.
np.set_printoptions(precision=3)
print median_ratios
In [348]:
plt.figure(figsize=(10,20))
for i_sensor in range(len(results)):
for j_sensor, hist in enumerate(histograms[i_sensor]):
#print len(hist)
if j_sensor < 5:
plt.subplot(len(results),2,2*i_sensor+1)
else:
plt.subplot(len(results),2,2*i_sensor+2)
plt.step(bin_centers, hist, alpha=.75, label='i,j = %i, %i'%(i_sensor+1, j_sensor+1))
plt.legend(frameon=False,fontsize=7, labelspacing=.2)
plt.xlabel('$v_i/v_j$')
In [358]:
#bins=np.linspace(0,3,50) # Velocity bins
v_threhold = 2. # Only include velocity ratios when both velocities are above this
bin_centers = 0.5*(bins[:-1] + bins[1:])
histograms = np.zeros((len(results), len(results), len(bins)-1))
median_ratios = np.zeros((len(results), len(results)))
for i_sensor, (mu_i, sigma_i,) in enumerate(results):
# Get the velocities of the first set.
v_i = mu_i[:,1]
for j_sensor, (mu_j, sigma_j,) in enumerate(results):
if i_sensor == j_sensor:
continue
# Get the velocities of the second set.
v_j = mu_j[:,1]
valid = np.where((v_i<v_threhold) & (v_j<v_threhold) )[0] # Only positive since we are looking at snowfall not melt rate
hist, bin_edges = np.histogram(v_i[valid]/v_j[valid], bins=bins, normed=True)
ratios = (v_i[valid]/v_j[valid])
median_ratios[i_sensor, j_sensor] = np.median(ratios[~np.isnan(ratios)])
histograms[i_sensor, j_sensor] = hist
# Find velocities over some threhold
np.set_printoptions(precision=3)
print median_ratios
In [347]:
plt.figure(figsize=(10,20))
for i_sensor in range(len(results)):
for j_sensor, hist in enumerate(histograms[i_sensor]):
#print len(hist)
if j_sensor < 5:
plt.subplot(len(results),2,2*i_sensor+1)
else:
plt.subplot(len(results),2,2*i_sensor+2)
plt.step(bin_centers, hist, alpha=.75, label='i,j = %i, %i'%(i_sensor+1, j_sensor+1))
plt.legend(frameon=False,fontsize=7, labelspacing=.2)
plt.xlabel('$v_i/v_j$')
In [ ]: